---
title: "Health & Wealth in India NFHS-4"
author: "Lara Jung"
date: "3/23/2018"
output: html_document
---

#Data Cleaning and Merging
Load packages
```{r load packages}


library(tidyverse) 
library(dplyr) 
library(forcats) # for categorical variables (R for data science rec) --> see https://rdrr.io/cran/forcats/man/fct_unify.html
library(haven) # for read_dta/read_csv, Translate value labels into a new labelled() class, which preserves the original semantics and can easily be coerced to factors with as_factor()
library(ggplot2) #plot data
library(ggrepel)# to avoid text labels in ggplot from overlapping
library(modelr)# to use "add_predictions()" for adding a column of predicted vals to your dataset
library(broom)# to create tidy data from model output
#library(broom.mixed) #to extract coefficents (for mixed models)
#library(lme4) # for multi-level modeling
#library(lmerTest) # for p-values with the lmer command
library(srvyr) # survey package that also works with dplyr 
library(speedglm) #glm analyssis
library(arm)#bayesglm
library(readstata13) #read dta datasets
library(tableone) # Creates a table 1 (summary characteristics)




```

The code chunk below takes the DHSdatasets that were cleaned in the "DHS_cleaning" file and contain sampling weights, cleans them further, drops irrelevant columns to yield a dataset for analysis .

```{r Cleaning and merging data, eval=FALSE}




India_DHS <- read_csv("India_DHS_06_07_19.csv",col_types = cols(asset_index_rural = col_number(),smb51 = col_number(),smb52 = col_number()))
India_DHS <- as_tibble(India_DHS)



#1. select variables for analysis##################
India_DHS<-dplyr::select(India_DHS, p_id,hh_id,ex_state_ind, ex_d_name_ind, d_id,ex_glucose_ind,fast, hbg12,sbp_avg,dbp_avg,htn_know,htn_treatment,bmi,csmoke,age_5yr,age, urban, sex, educat_lcl,educat_lcl_new,literacy, wealth_quintile_rurb,asset_index_combined,bp_ms,fast_variable_drink,fast_variable_eat)



#2. district vars corrections#################
#correct district names
India_DHS$ex_d_name_ind<-as.character(India_DHS$ex_d_name_ind)
India_DHS<-mutate(India_DHS,ex_d_name_ind=
 
ifelse(d_id=="13_25", "Lahul&Spiti", 
ifelse(d_id=="33_153", "Lakhimpur Kheri", 
ifelse(d_id=="33_187", "Maharajganj", 
ifelse(d_id=="25_90", "North West Dheli",
ifelse(d_id=="25_91",  "North Delhi",
ifelse(d_id=="25_92", "North East Dheli",
ifelse(d_id=="25_93",       "East Dheli",
ifelse(d_id=="25_94",       "New Dheli",
ifelse(d_id=="25_95",       "Central Dheli",
ifelse(d_id=="25_96",       "West Dheli",
ifelse(d_id=="25_97", "South West Dheli",
ifelse(d_id=="25_98",     "South Dheli",

ifelse(d_id=="30_241", "North Sikkim",
ifelse(d_id=="30_242", "West Sikkim",
ifelse(d_id=="30_243", "South Sikkim",
ifelse(d_id=="30_244", "East Sikkim", 
                    
ifelse(d_id=="13_28", "Hamirpur (Himachal Pradesh)", 
ifelse(d_id=="33_168","Hamirpur (Uttar Pradesh)",                   
ifelse(d_id=="7_406", "Bilaspur (Chhattisgarh)",
ifelse(d_id=="13_30", "Bilaspur (Himachal Pradesh)",

ifelse(d_id=="29_131", "Pratapgarh (Rajasthan)",
ifelse(d_id=="33_173", "Pratapgarh (Uttar Pradesh)",

ifelse(d_id=="20_515","Aurangabad (Maharashtra)", 
ifelse(d_id=="5_235","Aurangabad (Bihar)",
       
ifelse(d_id=="7_403","Raigarh (Chhattisgarh)",  
ifelse(d_id=="20_520", "Raigarh (Maharashtra)",   

ifelse(d_id=="7_417", "Bijapur (Chhattisgarh)",  
ifelse(d_id=="16_557",  "Bijapur (Karnataka)",
ex_d_name_ind)))))))))))))))))))))))))))))

India_DHS$ex_d_name_ind<-as.factor(India_DHS$ex_d_name_ind)

#check
unique(India_DHS$ex_d_name_ind)
```



#Create variables for analyisis

```{r Define CVD risk factor/ outcome variables}

# 1. diabetes ######################

#correct fast definition

India_DHS<-mutate(India_DHS, fast=ifelse(is.na(fast_variable_drink)==T | is.na(fast_variable_eat)==T,NA,ifelse(fast_variable_drink>95 | fast_variable_eat>48,NA,ifelse(fast_variable_drink>=8 & fast_variable_eat >= 8,1,0))))

summary(as.factor(India_DHS$fast))

#High blood sugar definition
India_DHS<- dplyr::mutate(India_DHS, ex_diab_narrow_ind =
ifelse(is.na(ex_glucose_ind)==TRUE | is.na(fast)==TRUE, NA,
ifelse( fast==1 & ex_glucose_ind >=126 | fast==0 & ex_glucose_ind >=200,1,0)))  

India_DHS$ex_diab_narrow_ind <- as.factor(India_DHS$ex_diab_narrow_ind)#factor variable
India_DHS<-mutate(India_DHS,ex_diab_narrow_ind_dbl=as.numeric(as.character(ex_diab_narrow_ind)))#dbl variable

#check
summary(India_DHS$ex_diab_narrow_ind)
#23728 missings 

#broader definition including self report
India_DHS <-mutate(India_DHS,
            ex_diab_broad_ind = ifelse(is.na(ex_diab_narrow_ind)==T | is.na(hbg12) ==T, NA, ifelse(hbg12==1 | ex_diab_narrow_ind==1, 1, 0)))

India_DHS$ex_diab_broad_ind <- as.factor(India_DHS$ex_diab_broad_ind)#factor variable
India_DHS<-mutate(India_DHS,ex_diab_broad_ind_dbl=as.numeric(as.character(ex_diab_broad_ind)))#dbl variable

#check
summary(as.factor(India_DHS$hbg12))
#11471 Don't knows
summary(India_DHS$ex_diab_broad_ind)
#23728 missings 



# 2. hypertension ######################

#high blood pressure definition

India_DHS <- mutate(India_DHS,ex_htn_narrow_ind=ifelse(is.na(sbp_avg)==T | is.na(dbp_avg)==T, NA, ifelse(sbp_avg>=140 | dbp_avg >=90, 1, 0)))

#check
summary(as.factor(India_DHS$ex_htn_narrow_ind))
#15015 missings

#turn paradox responses into NA
India_DHS <- mutate(India_DHS,ex_htn_narrow_ind=ifelse(is.na(sbp_avg)==T | is.na(dbp_avg)==T, NA, ifelse(dbp_avg>sbp_avg,NA,ex_htn_narrow_ind)))#remove not logical values with diastolic>systolic pressure

#check
summary(as.factor(India_DHS$ex_htn_narrow_ind))
#15044 missings


India_DHS$ex_htn_narrow_ind <- as.factor(India_DHS$ex_htn_narrow_ind)#factor variable
India_DHS<-mutate(India_DHS,ex_htn_narrow_ind_dbl=as.numeric(as.character(ex_htn_narrow_ind)))#dbl variable

#broader definition including self report
India_DHS <-mutate(India_DHS,
          ex_htn_broad_ind = ifelse(is.na(ex_htn_narrow_ind)==T | is.na(htn_treatment)==T | is.na(htn_know)==T, NA, ifelse(htn_treatment==1 | htn_know==1 | ex_htn_narrow_ind==1, 1, 0)))

India_DHS$ex_htn_broad_ind <- as.factor(India_DHS$ex_htn_broad_ind)#factor variable
India_DHS<-mutate(India_DHS,ex_htn_broad_ind_dbl=as.numeric(as.character(ex_htn_broad_ind)))#dbl variable

#check
summary(as.factor(India_DHS$ex_htn_broad_ind))
#15069 missings


# 3. bmi ######################
######################## Create bmi group


India_DHS <- dplyr::mutate(India_DHS, 
                            bmi_grp = ifelse(is.na(bmi)==T,NA,
                                       ifelse(bmi<18.5, "<18.5",
                                            ifelse(bmi>=18.5 & bmi<23, "18.5-<23",
                                                  ifelse(bmi>=23 & bmi<25, "23-<25",
                                                        ifelse(bmi>=25 & bmi<30, "25-<30", ">=30"))))))


India_DHS$bmi_grp <- as.factor(India_DHS$bmi_grp)

India_DHS <- within(India_DHS, bmi_grp <- relevel(bmi_grp, ref = "<18.5"))

################## Detailed BMI groups
India_DHS <- India_DHS %>% 
  mutate(
bmigrt27.5 = as.factor(ifelse(is.na(bmi)==T , NA, ifelse(bmi>=27.5 , 1, 0))))


India_DHS <- within(India_DHS, bmigrt27.5 <- relevel(bmigrt27.5, ref = "0"))#factor variable
India_DHS<-mutate(India_DHS,bmigrt27.5_dbl=as.numeric(as.character(bmigrt27.5)))#dbl variable

#check
summary(India_DHS$bmigrt27.5)
#14411 missings

# 4. currently smoking ######################
#currently smoking:defined as smoking cigarettes pipes,cigars hookah,bidis according to new created variable csmoke_new (see "loas and merge additional variables...")

India_DHS$csmoke <- as.factor(India_DHS$csmoke) #factor variable
India_DHS<-mutate(India_DHS,csmoke_dbl=as.numeric(as.character(csmoke)))#dbl variable

#check
summary(India_DHS$csmoke)
#no missings


```





```{r  create Individual-level dependent (level 1) variables}

#1. 5 year age groups####################

India_DHS <- dplyr::mutate(India_DHS, 
                           age_grp=
                             ifelse(is.na(age_5yr)==T ,NA,
                                    ifelse(age_5yr==1,"15-19", 
                                                 ifelse(age_5yr==2,"20-24",
                                                 ifelse(age_5yr==3,"25-29",
                                                        ifelse(age_5yr==4,"30-34",
                                                          ifelse(age_5yr==5,"35-39",
                                                           ifelse(age_5yr==6,"40-44",
                                                          ifelse(age_5yr==7,"45-49","50-54"
                                                          ))))))))) 

India_DHS$age_grp<-factor(India_DHS$age_grp, levels = c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"  ,"50-54"))    
India_DHS<- within(India_DHS, age_grp <- relevel(age_grp, ref = "15-19"))


#check
summary(India_DHS$age_grp)
#no missings


#2. urban/rural####################

#define rural
India_DHS<-mutate(India_DHS, rural=ifelse(is.na(urban)==T,NA,ifelse(urban==1,0,1)))

#adjust classes
India_DHS$urban <- as.factor(India_DHS$urban)#factor variable
India_DHS <-India_DHS %>% 
  mutate(urban_dbl = as.numeric(as.character(urban)),rural_dbl = as.numeric(as.character(rural)))#dbl variables

#urban_lab for table
India_DHS <- mutate(India_DHS,urban_lab=ifelse(is.na(urban)==T,NA,ifelse(urban==1,"urban","rural")))
India_DHS$urban_lab<-as.factor(India_DHS$urban_lab)

#check
summary(India_DHS$urban)
#no missing


#3. sex####################

#adjust classes

India_DHS$sex <- as.factor(India_DHS$sex)#factor variable
India_DHS <-India_DHS %>% 
  mutate(sex_dbl = as.numeric(as.character(sex)))#dbl variable

#sex_lab for table
India_DHS <- mutate(India_DHS,sex_lab=ifelse(is.na(sex)==T,NA,ifelse(sex==1,"female","male")))
India_DHS$sex_lab<-as.factor(India_DHS$sex_lab)

#check
summary(India_DHS$sex)
#no missing


#4. education####################


#education lab for table
India_DHS <- mutate(India_DHS,educat_lab=
                      ifelse(is.na(educat_lcl)==T,NA,
                             ifelse(educat_lcl==1,"No formal education",
                                   ifelse(educat_lcl==2, "< Primary",
                                          ifelse(educat_lcl==3 , "Primary",
                                                 ifelse(educat_lcl==4, "<Secondary ",
                                                 ifelse(educat_lcl==5, "Secondary",
                                                        
                                                ifelse(educat_lcl==6,">Secondary",NA))))))))
                                             
#education variable for analysis
#definition of categories

#1 no FORMAL education & below primary (below primary)
#3 completed primary (primary)
#4 some secondary (secondary/middle school)
#5 completed secondary (people who have higher secondafy/preuniversity etc)
#6 higher

India_DHS<-mutate(India_DHS,ed_5=
                    ifelse(is.na(educat_lcl)==T,NA,
                           ifelse(educat_lcl==1|educat_lcl==2,1,
                                  ifelse(educat_lcl==3,3,
                                         ifelse(educat_lcl==4,4,
                                                ifelse(educat_lcl==5,5,
                                                       ifelse(educat_lcl==6,6,NA)))))))

India_DHS$ed_5<-as.factor(as.character(India_DHS$ed_5))
India_DHS <- within(India_DHS, ed_5 <- relevel(ed_5, ref = "1")) #factor variable

#check
summary(as.factor(India_DHS$educat_lcl))
summary(India_DHS$ed_5)
#no missing

#ed_4

India_DHS<-mutate(India_DHS,ed_4=
                    ifelse(is.na(educat_lcl)==T,NA,
                           ifelse(educat_lcl==1|educat_lcl==2,1,
                                  ifelse(educat_lcl==3,3,
                                         ifelse(educat_lcl==4,4,
                                                ifelse(educat_lcl==5 |educat_lcl==6,5,NA
                                                ))))))

India_DHS$ed_4<-as.factor(as.character(India_DHS$ed_4))
India_DHS <- within(India_DHS, ed_4 <- relevel(ed_4, ref = "1")) #factor variable

#check
summary(as.factor(India_DHS$educat_lcl))
summary(India_DHS$ed_4)
#no missing



#5. district household wealth quintile####################

#adjust classes for wealth variables

India_DHS$asset_index_combined<-as.numeric(India_DHS$asset_index_combined)

#calculate urban proportion within district to define variable

India_DHS <- mutate(India_DHS,urban_all=ifelse(is.na(urban)==T,NA,1))#everybody who has an observation for urban/rural

India_DHS<- India_DHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_u=sum(urban_dbl,na.rm=TRUE), count_denom_u=sum(urban_all,na.rm=TRUE),count_nom_r=sum(rural_dbl,na.rm=TRUE), count_denom_r=sum(urban_all,na.rm=TRUE) )#calculating sum of cases by district

India_DHS<- dplyr::mutate(India_DHS,urban_prop =(count_nom_u/count_denom_u)*100,rural_prop =(count_nom_r/count_denom_r)*100 )#percentage
India_DHS<-India_DHS%>%ungroup()

#define district wealth quintiles
India_DHS<-India_DHS%>%group_by(ex_d_name_ind,urban)%>%mutate(
hh_wealth_quintile_district=
  ifelse(is.na(asset_index_combined)==T, NA,
         ifelse(urban_prop>=5 & rural_prop>=5,ntile(asset_index_combined,5),NA)))
# urban/rural prop >= 5% for 608/640 districts
India_DHS<-India_DHS%>%ungroup()  

India_DHS<-India_DHS%>%group_by(ex_d_name_ind)%>%mutate(hh_wealth_quintile_district=
  ifelse(is.na(asset_index_combined)==T, NA,             
          ifelse(urban_prop<5 | rural_prop<5 ,ntile(asset_index_combined,5),hh_wealth_quintile_district)))
                                                                 
India_DHS<-India_DHS%>%ungroup()   


India_DHS$hh_wealth_quintile_district<-as.factor(India_DHS$hh_wealth_quintile_district)
India_DHS <- within(India_DHS, hh_wealth_quintile_district <- relevel(hh_wealth_quintile_district, ref = "1"))#factor variable

India_DHS <- India_DHS %>% 
  mutate(hh_wealth_quintile_district_c = as.numeric(as.character(hh_wealth_quintile_district)))#dbl variable

#check
summary(India_DHS$hh_wealth_quintile_district)
#no missings

#create variable that combines bottom and top wealth quintiles 
#definition of groups

#1 poor (wealth quintile 1 & 2)
#2 medium richt (wealth quintile 3)
#3 rich ( wealth quintile 4 & 5)

India_DHS<-mutate(India_DHS, hh_wealth_quintile_groups_district=
ifelse(is.na(hh_wealth_quintile_district_c)==T, NA,
ifelse(hh_wealth_quintile_district_c==1 | hh_wealth_quintile_district_c==2,0,
ifelse(hh_wealth_quintile_district_c==3,1,
ifelse(hh_wealth_quintile_district_c==4 | hh_wealth_quintile_district_c==5,2,NA
 )))))

India_DHS$hh_wealth_quintile_groups_district<-as.factor(as.character(India_DHS$hh_wealth_quintile_groups_district))
India_DHS <- within(India_DHS, hh_wealth_quintile_groups_district <- relevel( hh_wealth_quintile_groups_district, ref= "0")) #factor variable

#check
summary(India_DHS$hh_wealth_quintile_groups_district)
#no missings

#6. national household wealth quintile####################

India_DHS$wealth_quintile_rurb<-as.factor(India_DHS$wealth_quintile_rurb)
#check
summary(India_DHS$wealth_quintile_rurb)
#no missings

India_DHS <- India_DHS %>% 
  mutate(wealth_quintile_rurb_c = as.numeric(as.character(wealth_quintile_rurb)))#dbl variable


#create variable that combines bottom and top wealth quintiles 
#definition of groups

#1 poor (wealth quintile 1 & 2)
#2 medium richt (wealth quintile 3)
#3 rich ( wealth quintile 4 & 5)

India_DHS<-mutate(India_DHS, wealth_quintile_rurb_groups=
ifelse(is.na(wealth_quintile_rurb_c)==T, NA,
ifelse(wealth_quintile_rurb_c==1 | wealth_quintile_rurb_c==2,0,
ifelse(wealth_quintile_rurb_c==3,1,
ifelse(wealth_quintile_rurb_c==4 | wealth_quintile_rurb_c==5,2,NA
 )))))

India_DHS$wealth_quintile_rurb_groups<-as.factor(as.character(India_DHS$wealth_quintile_rurb_groups))
India_DHS <- within(India_DHS, wealth_quintile_rurb_groups <- relevel( wealth_quintile_rurb_groups, ref= "0")) #factor variable

#check
summary(India_DHS$wealth_quintile_rurb_groups)
#no missings


#7. Scaled asset index
India_DHS<-mutate(India_DHS,asset_index_combined_s=(
                     asset_index_combined-(mean(asset_index_combined, na.rm=TRUE)))/( sd(asset_index_combined,na.rm=TRUE)))


#check
summary(India_DHS$asset_index_combined_s)
#no missings, mean=0




```


#Filter & district sample size statistics
```{r filter households>100}

India_DHS<- India_DHS %>% group_by(ex_d_name_ind)%>%mutate(n= n())%>%ungroup()
India_DHS<- dplyr::filter(India_DHS,n>=100)

#how districts have are still included?
test<-India_DHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup()
test<-dplyr::filter(test,n>=100)
#640/640


```

```{r District-level indicators (level 2 variables)}

#1. Load GDP/Capita (PCI) (from planning comission) and merge#####################

PCI_districts<-read_csv("PCI_districts_NFHS4_08_19_clean.csv")

India_DHS<-India_DHS<-left_join(India_DHS,PCI_districts,by="ex_d_name_ind")

India_DHS$PCI_districts <- as.numeric(India_DHS$PCI_districts)

#check if there is an unmatched value from PCI district list
test<-anti_join(India_DHS,PCI_districts,by="ex_d_name_ind")
#0 obs


#which districts have a non-missings PCI value?
test<-India_DHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup()
test<-dplyr::filter(test,is.na(PCI_districts)==F)
#476/640

#check
summary(India_DHS$PCI_districts)
#181274 NAs

#2. educational attainment and literacy##############################

########################### Female Literacy
######Refers to women who can read a whole sentence (v155) or part of a sentence and women who completed standard 6 or higher (v106 is 2 or 3; here educat_lcl_new) according to DHS forum and final report

###literacy definition
India_DHS<-mutate(India_DHS,literate=ifelse(educat_lcl_new==2 |educat_lcl_new==3 |literacy==1 | literacy==2,1,0))# no NA'S

###all female literates
India_DHS <- mutate(India_DHS,edc_l=ifelse(is.na(literate)==T| is.na(sex)==T,NA,ifelse(literate==1 & sex==1,1,0)))

#check
summary(as.factor(India_DHS$edc_l))
#0 missings, 436969 1's

###every women who has a observation for literacy 
India_DHS <- mutate(India_DHS,edc_l0=ifelse(is.na(literate)==F & sex==1 ,1,0))   

#check
summary(as.factor(India_DHS$edc_l0))
#0 missings, 647451 1's (equals number of female participants)

########################## percentage of men and women who completed at least primary education


# educational attainment:
India_DHS <- mutate(India_DHS,edc_2_new=ifelse(is.na(educat_lcl)==T,NA,ifelse(educat_lcl>=3,1,0)))

India_DHS <- mutate(India_DHS,edc_0_new=ifelse(is.na(educat_lcl)==T ,NA,1))  

#check
summary(as.factor(India_DHS$educat_lcl))
summary(as.factor(India_DHS$edc_2_new))
#no missings, equals educat_level category 3+4+5+6
summary(as.factor(India_DHS$edc_0_new))
#no missings,equals number of participants

#########calculating sum of cases by district for both

#educat_new
India_DHS<- India_DHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_new=sum(edc_2_new,na.rm=TRUE), count_denom_new=sum(edc_0_new,na.rm=TRUE))#unweighted


#literacy_female
India_DHS<- India_DHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom=sum(edc_l,na.rm=TRUE), count_denom=sum(edc_l0,na.rm=TRUE))


########percentage
India_DHS<- dplyr::mutate(India_DHS,literacy_rate_female =(count_nom/count_denom)*100)


India_DHS<- dplyr::mutate(India_DHS,ed_att_new=(count_nom_new/count_denom_new)*100)#unweighted

India_DHS<-India_DHS%>%ungroup()


#check
summary(India_DHS$literacy_rate_female)
#no missings
summary(India_DHS$ed_att_new)
#no missings,


#3. urban/rural proportion##############################

#everybody who has an observation for urban/rural
India_DHS <- mutate(India_DHS,urban_all=ifelse(is.na(urban)==T,NA,1))

#check
summary(as.factor(India_DHS$urban_all))
#no missings, equals number of participants

#########calculating sum of cases by district
India_DHS<- India_DHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_u=sum(urban_dbl,na.rm=TRUE), count_denom_u=sum(urban_all,na.rm=TRUE),count_nom_r=sum(rural_dbl,na.rm=TRUE), count_denom_r=sum(urban_all,na.rm=TRUE) )


##########percentage
India_DHS<- dplyr::mutate(India_DHS,urban_prop =(count_nom_u/count_denom_u)*100,rural_prop =(count_nom_r/count_denom_r)*100 )

India_DHS<-India_DHS%>%ungroup()

#check
summary(India_DHS$urban_prop)
#no missings




#4. median wealth##############################


#####seperate for urban and rural for district level regressions
India_DHS <- India_DHS %>% 
  group_by(ex_d_name_ind,urban_lab) %>% 
  mutate(medianai_r_u= median(asset_index_combined_s, na.rm=TRUE))%>% 
  ungroup()

####not seperate by urban/rural for multilevl regressions
India_DHS <- India_DHS %>% 
  group_by(ex_d_name_ind) %>% 
  mutate(medianai= median(asset_index_combined_s, na.rm=TRUE))%>% 
  ungroup()

#check
summary(India_DHS$medianai)
summary(India_DHS$medianai_r_u)
#no missings



```










#Sample Chracteristics


```{r TABLE 1, Table S3 & S4}

India_DHS$literate<-as.factor(India_DHS$literate)
  
#variables that are included
table1names <- c("ex_diab_narrow_ind","ex_diab_broad_ind","ex_htn_narrow_ind","ex_htn_broad_ind","bmi_grp","csmoke","age_grp","sex_lab","urban_lab","hh_wealth_quintile_district","educat_lab","bmigrt27.5","literate","age","ed_5","wealth_quintile_rurb")



#create table
totalmiss<- CreateTableOne(vars = table1names, data=India_DHS, includeNA=FALSE)
print(totalmiss)
sexmiss <- CreateTableOne(vars = table1names, data=India_DHS, strata = "sex_lab", includeNA=FALSE)
print(sexmiss)




#write excel sheet so copying to word is possible
DHS_total <- print(totalmiss, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
DHS_sexmiss<-print(sexmiss, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
write.csv(DHS_total, file = "DHS_total.csv")
write.csv(DHS_sexmiss, file = "DHS_sexmiss.csv")
#write.csv(tab3Mat, file = "myTable.csv")
```






# Discussion analyses


```{r CVD risk onto household wealth whole dataset + subset}

#Logistic regression of CVD risk factors onto household wealth with district-level fixed effects

India_DHS$ex_d_name_ind<-as.factor(India_DHS$ex_d_name_ind)
#make sure reference variable is the same
dlevel_fixed_effects_analysis <- within(India_DHS,ex_d_name_ind<- relevel(ex_d_name_ind, ref = "Alirajpur"))



# define subset

#districts <-India_DHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup
#quantile(districts$ed_att_new, .2,na.rm=TRUE)
#56.44

subset<-dplyr::filter(dlevel_fixed_effects_analysis, ed_att_new<56.44)
subset <- within(subset,ex_d_name_ind<- relevel(ex_d_name_ind, ref = "Alirajpur"))




##### diabetes

##glm
#whole dataset
mod_1<-speedglm(ex_diab_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs<-mod_1%>%tidy(conf.int=TRUE, method="Wald")
write.csv(coeffs,"DHS_mod_1.csv")

#subset
mod_1b<-speedglm(ex_diab_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 

coeffs <-mod_1b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_1b.csv")


##lm

#whole dataset
mod_1<-speedlm(ex_diab_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs<-mod_1%>%tidy(conf.int=TRUE, method="Wald")
write.csv(coeffs,"DHS_mod_1_lm.csv")

#subset
mod_1b<-speedlm(ex_diab_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_1b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_1b_lm.csv")




##### hypertension

##glm
#whole dataset
mod_2<-speedglm(ex_htn_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs <-mod_2%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_2.csv")

#subset
mod_2b<-speedglm(ex_htn_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 

coeffs <-mod_2b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_2b.csv")


##lm
#whole dataset
mod_2<-speedlm(ex_htn_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs <-mod_2%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_2_lm.csv")

#subset
mod_2b<-speedlm(ex_htn_broad_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_2b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_2b_lm.csv")







##### bmi
##glm
#whole dataset
mod_3<-speedglm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs <-mod_3%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_3.csv")

#subset
mod_3b<-speedglm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 
coeffs <-mod_3b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_3b.csv")


##lm
#whole dataset
mod_3<-speedlm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs <-mod_3%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_3_lm.csv")

#subset
mod_3b<-speedlm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_3b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DHS_mod_3b_lm.csv")





```
```{r Household wealth on educational attainment}
#Ordinary least square regression of household wealth on educational attainment




mod<-lm(hh_wealth_quintile_district_c ~ as.factor(ed_5),data=India_DHS) 
summary(mod)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"DHS_mod_wealth_ed_lm.csv")




```
```{r Association of a district’s primary school completion rate with the difference in the continuous household wealth index between highest and lowest household wealth quintile}

#######Association of a district’s primary school completion rate with the difference in the continuous household wealth index between highest and lowest household wealth quintile

#### district wise 


Ana_d <-India_DHS

#drop districts with <20 observations in wealth quintile 1 or 5


#filter districts >=20 cases
Ana_d<-mutate(Ana_d,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)


Ana_d<- Ana_d %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))


Ana_d<-Ana_d %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

#check numbers
Ana_d%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#640


#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_d%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_d%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_district<-left_join(Ana_d,hwq1_df, by="ex_d_name_ind")
analysis_wq_district<-left_join(analysis_wq_district,hwq5_df, by="ex_d_name_ind")
analysis_wq_district<-mutate(analysis_wq_district,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_district <- analysis_wq_district%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1)%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=analysis_wq_district) 
summary(mod)


#graph 
fig<- analysis_wq_district %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=20/1, expand=F)


print(fig)




#### district-wise stratified by urban residence

Ana_d_ru <-India_DHS

#drop districts with <20 observations in wealth quintile 1 or 5 (stratisfied by rural/urban residence)


#filter districts >=20 cases
Ana_d_ru<-mutate(Ana_d_ru,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)

Ana_d_ru<- Ana_d_ru %>%group_by(ex_d_name_ind,urban)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))


Ana_d_ru<-Ana_d_ru %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

#check numbers
#urban
Ana_d_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==1)%>%dplyr::filter(row_number()==1)
#576

#rural
Ana_d_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==0)%>%dplyr::filter(row_number()==1)
#621

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_d_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_d_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_district_r_u<-left_join(Ana_d_ru,hwq1_df, by=c("ex_d_name_ind","urban"))
analysis_wq_district_r_u<-left_join(analysis_wq_district_r_u,hwq5_df, by=c("ex_d_name_ind","urban"))
analysis_wq_district_r_u<-mutate(analysis_wq_district_r_u,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_district_r_u <-analysis_wq_district_r_u%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1,urban,urban_lab)%>% group_by(ex_d_name_ind,urban)%>%dplyr::filter(row_number()==1)%>%ungroup


#lm analysis
urban<-dplyr::filter(analysis_wq_district_r_u,urban==1)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=urban) 
summary(mod)

rural<-dplyr::filter(analysis_wq_district_r_u,urban==0)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=rural) 
summary(mod)

#graph
labels<-c (urban="Urban", rural= "Rural")

fig<- analysis_wq_district_r_u %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=32/1, expand=F)


print(fig)





#### national

#drop districts with <20 observations in wealth quintile 1 or 5
Ana_n <-India_DHS

Ana_n<-mutate(Ana_n,
                   wq_1_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
)

Ana_n<- Ana_n %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=20 cases
Ana_n<-Ana_n %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

##check numbers
Ana_n%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#593 districts

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_n%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_n%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_national<-left_join(Ana_n,hwq1_df, by="ex_d_name_ind")
analysis_wq_national<-left_join(analysis_wq_national,hwq5_df, by="ex_d_name_ind")
analysis_wq_national<-mutate(analysis_wq_national,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_national <- analysis_wq_national%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1)%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=analysis_wq_national) 
summary(mod)

#graph 
fig<- analysis_wq_national %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=20/1, expand=F)


print(fig)



#### national stratified by urban residence

#drop districts with <20 observations in wealth quintile 1 or 5
Ana_n_ru <-India_DHS

Ana_n_ru<-mutate(Ana_n_ru,
                   wq_1_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
)

Ana_n_ru<- Ana_n_ru %>%group_by(ex_d_name_ind,urban)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=20 cases
Ana_n_ru<-Ana_n_ru %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

##check numbers
Ana_n_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==1)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#337

Ana_n_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==0)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#438

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_n_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(wealth_quintile_rurb==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_n_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(wealth_quintile_rurb==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_national_r_u<-left_join(Ana_n_ru,hwq1_df, by=c("ex_d_name_ind","urban"))
analysis_wq_national_r_u<-left_join(analysis_wq_national_r_u,hwq5_df, by=c("ex_d_name_ind","urban"))
analysis_wq_national_r_u<-mutate(analysis_wq_national_r_u,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)

#one observation/district
analysis_wq_national_r_u <- analysis_wq_national_r_u%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1,urban,urban_lab)%>% group_by(ex_d_name_ind,urban)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
urban<-dplyr::filter(analysis_wq_national_r_u,urban==1)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=urban) 
summary(mod)

rural<-dplyr::filter(analysis_wq_national_r_u,urban==0)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=rural) 
summary(mod)


#graph
labels<-c (urban="Urban", rural= "Rural")
fig<- analysis_wq_national_r_u %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=32/1, expand=F)

print(fig)






```

















#District level regressions using wealth quintiles computed for each district: top vs bottom
######linear and logistic regressions 
```{r filter districts with no contrasts}


Ana1<-India_DHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_broad_ind,ex_htn_broad_ind,bmigrt27.5,csmoke,ex_diab_broad_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,hh_wealth_quintile_district,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)


Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)

 
#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-Ana1%>%group_by(ex_d_name_ind,hh_wealth_quintile_district)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, hh_wealth_quintile_district==1 | hh_wealth_quintile_district==5)%>%summarize(sum=n())
#0 districts



Ana1<-mutate(Ana1,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_1_o >=50 & count_wq_5_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#608 (608-0)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-test%>%group_by(ex_d_name_ind,hh_wealth_quintile_district)%>%summarize(diabetes_cases= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#362 districts
test_d %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#522 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#2 districts
test_h %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#5 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#21 districts
test_o %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#359 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#423 districts
test_c %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#206 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(diabetes_cases_5= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_5=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_5=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_5=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_5)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_5)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_5)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_5)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_5<10)%>%summarize(sum=n())#362 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_5<10)%>%summarize(sum=n())#2 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_5<10)%>%summarize(sum=n())#21 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_5<10)%>%summarize(sum=n())
#423 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(diabetes_cases_1= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_1=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_1=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_1=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_1)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_1)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_1)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_1)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_1<10)%>%summarize(sum=n())#522 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_1<10)%>%summarize(sum=n())#5 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_1<10)%>%summarize(sum=n())#359 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_1<10)%>%summarize(sum=n())
#206 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_1+diabetes_cases_5)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_1+hypertension_cases_5)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_1+obesity_cases_5)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_1+csmoke_cases_5)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#142-> 608-142=466 rows removed from graphs
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#606->608-606=2 rows removed from graphs
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#528-> 608-528=80 rows removed from graphs
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#314-> 608-314=294 rows removed from graphs







```


```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_broad_ind ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_district5")


coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_broad_ind ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_broad_ind_dbl ~ age_grp+sex+urban+hh_wealth_quintile_district, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_broad_ind_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DHS_w<-Ana1

#only 1 value/district (for rural/urban 2)
Ana2 <- Ana1_DHS_w%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DHS_w%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup



```
######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)




##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```















#District level regressions using educational attainment (bottom category vs top two categories)
######linear and logistic regressions 

```{r Regression analysis filter districts with no contrasts}


Ana1<-India_DHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_broad_ind,ex_htn_broad_ind,bmigrt27.5,csmoke,ex_diab_broad_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,ed_4,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u,ed_5)

Ana1 <- dplyr::filter(Ana1, urban_prop>=5& rural_prop>=5)



#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-Ana1%>%group_by(ex_d_name_ind,ed_4)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, ed_4==1 | ed_4==5)%>%summarize(sum=n())
#13 districts



Ana1<-mutate(Ana1,
                   ed_1_o=ifelse(is.na(ed_4)==T,NA,ifelse(ed_4==1,1,0)),
                   
                   ed_5_o=ifelse(is.na(ed_4)==T,NA,ifelse(ed_4==5,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_ed_1_o=sum(ed_1_o,na.rm=TRUE), count_ed_5_o=sum(ed_5_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_ed_1_o >=50 & count_ed_5_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#595 (608-13)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#595 districts
test<-test%>%group_by(ex_d_name_ind,ed_4)%>%summarize(diabetes_cases= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#495 districts
test_d %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#259
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#29 districts
test_h %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#0 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#102 districts
test_o %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#68 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#403 districts
test_c %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#127 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(ed_4==5)%>%summarize(diabetes_cases_5= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_5=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_5=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_5=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_5)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_5)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_5)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_5)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_5<10)%>%summarize(sum=n())#495 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_5<10)%>%summarize(sum=n())#29 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_5<10)%>%summarize(sum=n())#102 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_5<10)%>%summarize(sum=n())
#403 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(ed_4==1)%>%summarize(diabetes_cases_1= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_1=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_1=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_1=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_1)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_1)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_1)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_1)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_1<10)%>%summarize(sum=n())#259 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_1<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_1<10)%>%summarize(sum=n())#68 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_1<10)%>%summarize(sum=n())
#127 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_1+diabetes_cases_5)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_1+hypertension_cases_5)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_1+obesity_cases_5)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_1+csmoke_cases_5)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#200 districts->595-200=395 rows removed from graphs
#pci:595-440=155 districts
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#595 districts->595-595=0 rows removed from graphs
#pci:595-145=450 districts
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#531 districts->595-531=64 rows removed from graphs
#pci:595-188=407 districts
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#390 districts->595-390=205 rows removed from graphs
#pci:595-292=303 districts







```



```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis%>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_broad_ind ~ age_grp+sex+urban+ed_4,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_broad_ind ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")





######csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_broad_ind_dbl ~ age_grp+sex+urban+ed_4, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_broad_ind_dbl ~ age_grp+urban+sex+ed_4, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######obesity

#linear regression grouped by district
analysis <-obesity_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+ed_4, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+ed_4, .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```






















```{r graphs: filter:one value/district  }

#Ana1_DHS_ed<-Ana1

##only 1 value/district
Ana2 <- Ana1_DHS_ed%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup()

Ana2_urban_rural <- Ana1_DHS_ed%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup()



```

######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```

```{r graphs:PCI_districts }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)



##hypertension

#absolute differences

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((htn_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#ods ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((htn_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)


##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((bmi_f_coeff*100) ~PCI_districts,data=Ana2) 

summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((bmi_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)



```

```{r graphs:Female literacy rate }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##Hypertension

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


```



```{r graphs: Median household wealth }


urban<-dplyr::filter(Ana2_urban_rural,urban==1)
rural<-dplyr::filter(Ana2_urban_rural,urban==0)

labels<-c (urban="Urban", rural= "Rural")

##Diabetes

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)




##Hypertension

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((htn_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((htn_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


##bmi

#absolute difference 

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((bmi_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((bmi_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)



##csmoke

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_u)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


```








```{r graphs: urban prop}


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##Hypertension

#absolute difference 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~ urban_prop,data=Ana2) 
summary(mod)

```









#District level regressions using wealth quintiles computed for each district : bottom two vs top two quintiles
#####linear and logistic regressions
```{r filter districts with no contrasts}


Ana1<-India_DHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_broad_ind,ex_htn_broad_ind,bmigrt27.5,csmoke,ex_diab_broad_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,hh_wealth_quintile_groups_district,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)


Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)


 
#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-Ana1%>%group_by(ex_d_name_ind,hh_wealth_quintile_groups_district)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, hh_wealth_quintile_groups_district==0 | hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#0 districts



Ana1<-mutate(Ana1,
                   wq_0_o=ifelse(is.na(hh_wealth_quintile_groups_district)==T,NA,ifelse(hh_wealth_quintile_groups_district==0,1,0)),
                   
                   wq_2_o=ifelse(is.na(hh_wealth_quintile_groups_district)==T,NA,ifelse(hh_wealth_quintile_groups_district==2,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_0_o=sum(wq_0_o,na.rm=TRUE), count_wq_2_o=sum(wq_2_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_0_o >=50 & count_wq_2_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#608 (608-0)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-test%>%group_by(ex_d_name_ind,hh_wealth_quintile_groups_district)%>%summarize(diabetes_cases= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#149 districts
test_d %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#330 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#0 districts
test_h %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#0 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#7 districts
test_o %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#139 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#171 districts
test_c %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#63 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(diabetes_cases_2= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_2=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_2=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_2=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_2)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_2)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_2)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_2)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_2<10)%>%summarize(sum=n())#149 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_2<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_2<10)%>%summarize(sum=n())#7 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_2<10)%>%summarize(sum=n())
#171 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(diabetes_cases_0= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_0=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_0=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_0=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_0)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_0)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_0)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_0)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_0<10)%>%summarize(sum=n())#330 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_0<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_0<10)%>%summarize(sum=n())#139 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_0<10)%>%summarize(sum=n())
#63 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_0+diabetes_cases_2)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_0+hypertension_cases_2)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_0+obesity_cases_2)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_0+csmoke_cases_2)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#373-> 608-373=235 rows removed from graphs
#pci:608-326=282 districts
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#608->608-608=0 rows removed from graphs
#pci:608-146=462 districts
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#589-> 608-589=19 rows removed from graphs
#pci:608-154=454 districts
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#513-> 608-513=95 rows removed from graphs
#pci:608-221=387 districts







```


```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_broad_ind ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_groups_district2")


coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_broad_ind ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_broad_ind_dbl ~ age_grp+sex+urban+hh_wealth_quintile_groups_district, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_broad_ind_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DHS_w_g<-Ana1

#only 1 value/district (for rural/urban 2)
Ana2 <- Ana1_DHS_w_g%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DHS_w_g%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup


```
######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```

```{r graphs:PCI_districts }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)



##hypertension

#absolute differences

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((htn_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#ods ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((htn_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)


##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((bmi_f_coeff*100) ~PCI_districts,data=Ana2) 

summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((bmi_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)



```

```{r graphs:Female literacy rate }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##Hypertension

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


```



```{r graphs: Median household wealth }


urban<-dplyr::filter(Ana2_urban_rural,urban==1)
rural<-dplyr::filter(Ana2_urban_rural,urban==0)

labels<-c (urban="Urban", rural= "Rural")

##Diabetes

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)




##Hypertension

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((htn_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((htn_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


##bmi

#absolute difference 

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((bmi_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((bmi_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)



##csmoke

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_u)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


```








```{r graphs: urban prop}


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##Hypertension

#absolute difference 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~ urban_prop,data=Ana2) 
summary(mod)

```



#District level regressions using wealth quintiles computed nationally : bottom two vs top two quintiles
#####linear and logistic regressions
```{r filter districts with no contrasts}


Ana1<-India_DHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_broad_ind,ex_htn_broad_ind,bmigrt27.5,csmoke,ex_diab_broad_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,wealth_quintile_rurb_groups,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)


Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)


 
#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#608 districts
test<-Ana1%>%group_by(ex_d_name_ind,wealth_quintile_rurb_groups)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, wealth_quintile_rurb_groups==0 | wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#0 districts



Ana1<-mutate(Ana1,
                   wq_0_o=ifelse(is.na(wealth_quintile_rurb_groups)==T,NA,ifelse(wealth_quintile_rurb_groups==0,1,0)),
                   
                   wq_2_o=ifelse(is.na(wealth_quintile_rurb_groups)==T,NA,ifelse(wealth_quintile_rurb_groups==2,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_0_o=sum(wq_0_o,na.rm=TRUE), count_wq_2_o=sum(wq_2_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_0_o >=50 & count_wq_2_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#591 (608-17)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#591 districts
test<-test%>%group_by(ex_d_name_ind,wealth_quintile_rurb_groups)%>%summarize(diabetes_cases= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#205 districts
test_d %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#320 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#1 districts
test_h %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#11 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#26 districts
test_o %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#117 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#225 districts
test_c %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#129 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(diabetes_cases_2= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_2=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_2=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_2=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_2)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_2)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_2)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_2)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_2<10)%>%summarize(sum=n())#205 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_2<10)%>%summarize(sum=n())#1 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_2<10)%>%summarize(sum=n())#26 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_2<10)%>%summarize(sum=n())
#225 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(diabetes_cases_0= sum(ex_diab_broad_ind_dbl, na.rm=TRUE),hypertension_cases_0=sum(ex_htn_broad_ind_dbl,na.rm=TRUE),obesity_cases_0=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_0=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_0)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_0)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_0)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_0)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_0<10)%>%summarize(sum=n())#320 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_0<10)%>%summarize(sum=n())#11 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_0<10)%>%summarize(sum=n())#117 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_0<10)%>%summarize(sum=n())
#129 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_0+diabetes_cases_2)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_0+hypertension_cases_2)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_0+obesity_cases_2)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_0+csmoke_cases_2)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#368-> 591-368=223 rows removed from graphs
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#591->591-591=0 rows removed from graphs
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#573-> 591-573=18 rows removed from graphs
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#491-> 591-491=100 rows removed from graphs







```

```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_broad_ind ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="wealth_quintile_rurb_groups2")


coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_broad_ind ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_broad_ind_dbl ~ age_grp+sex+urban+wealth_quintile_rurb_groups, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_broad_ind_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DHS_w_n_g<-Ana1

#only 1 value/district (for rural/urban 2)
Ana2 <- Ana1_DHS_w_n_g%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DHS_w_n_g%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup


```
######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```

















#Multilevel Models 




```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DHS,ex_d_name_ind,ex_diab_narrow_ind_dbl,ex_diab_broad_ind_dbl,ex_htn_narrow_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai,ex_htn_broad_ind)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}

#####################individual level vars
#age_group:

multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0))
     )





# district household_wealth_quintile:

multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)

#check
summary(as.factor(multilevel$wq_1))
class(multilevel$wq_1)
summary(multilevel$hh_wealth_quintile_district)

#national household wealth quintile
multilevel<-mutate(multilevel,
                   wq_1_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   wq_2_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==2,1,0)),
                   wq_3_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==3,1,0)),
                   wq_4_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==4,1,0)),
                   wq_5_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
                  
)





#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)



#education
multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)

#check
summary(as.factor(multilevel$ed_4_o))
class(multilevel$ed_4_o)
summary(multilevel$ed_5)



######################District level variables vars


#median wealth#continuous->scaled


multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))




##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))



#educational attainment new#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))


##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))


#literacy rate female#continuous->scaled

multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))




```



```{r Correlation of district level indicators}

#Correlation of district level indicator variables (NFHS-4)

districts_indicator_test<-dplyr::select(multilevel,ex_d_name_ind,literacy_rate_female_center,PCI_district_center,ed_att_new_center,urban_prop_center,medianai_center)

districts_indicator_test<- districts_indicator_test[!duplicated(districts_indicator_test$ex_d_name_ind), ]


####educational attainment####
mod<-lm(ed_att_new_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_med_DHS.csv")


mod<-lm(ed_att_new_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_pci_DHS.csv")



mod<-lm(ed_att_new_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_urban_DHS.csv")



mod<-lm(ed_att_new_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_lf_DHS.csv")




####medianai####
mod<-lm(medianai_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_edatt_DHS.csv")


mod<-lm(medianai_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_pci_DHS.csv")



mod<-lm(medianai_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_urban_DHS.csv")



mod<-lm(medianai_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_lf_DHS.csv")




####PCI districts####
mod<-lm(PCI_district_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_edatt_DHS.csv")


mod<-lm(PCI_district_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_med_DHS.csv")



mod<-lm(PCI_district_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_urban_DHS.csv")



mod<-lm(PCI_district_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_lf_DHS.csv")




####urban prop####
mod<-lm(urban_prop_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_edatt_DHS.csv")


mod<-lm(urban_prop_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_med_DHS.csv")



mod<-lm(urban_prop_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_pci_DHS.csv")



mod<-lm(urban_prop_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_lf_DHS.csv")




####literacy rate female####
mod<-lm(literacy_rate_female_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_edatt_DHS.csv")


mod<-lm(literacy_rate_female_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_med_DHS.csv")



mod<-lm(literacy_rate_female_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_pci_DHS.csv")



mod<-lm(literacy_rate_female_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_lf_urban_DHS.csv")



```










######Multilevel Model: wealth interaction (quintiles computed for each district)


```{r level 1}




#diabetes
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:14,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DHS_diab_1.csv")


#hypertension
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:14,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"DHS_htn_1.csv")



#bmi

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:14,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"DHS_bmi_1.csv")


#csmoke
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:14,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"DHS_csmoke_1.csv")




```










```{r lmer analysis diabetes}


#median center

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DHS_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DHS_diab_pci.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DHS_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DHS_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DHS_diab_literacy_rate_female.csv")





```





```{r lmer analysis hypertension}


#median center

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_medi<-dplyr::mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"DHS_htn_medianai_center.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"DHS_htn_pci.csv")






##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"DHS_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"DHS_htn_urban_prop.csv")






#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"DHS_htn_literacy_rate_female.csv")




```

```{r lmer analysis obesity}


#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"DHS_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"DHS_bmi_pci.csv")




##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"DHS_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"DHS_bmi_urban_prop.csv")








#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"DHS_bmi_literacy_rate_female.csv")




```



```{r lmer analysis currently smoking}


#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"DHS_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"DHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"DHS_csmoke_urban_prop.csv")




#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"DHS_csmoke_literacy_rate_female.csv")




```



######Multilevel Model: education interaction



```{r level 1}


#diabetes
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:14 ,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"ed_DHS_diab_1.csv")


#hypertension
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:14 ,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"ed_DHS_htn_1.csv")



#bmi
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:14 ,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"ed_DHS_bmi_1.csv")


#csmoke
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:14 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"ed_DHS_csmoke_1.csv")





```







```{r lmer analysis diabetes}


#median center

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"ed_DHS_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"ed_DHS_diab_pci.csv")




##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"ed_DHS_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"ed_DHS_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"ed_DHS_diab_literacy_rate_female.csv")




```





```{r lmer analysis hypertension }

#median center

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"ed_DHS_htn_medianai_center.csv")



#PCI

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+ PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"ed_DHS_htn_pci.csv")



##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"ed_DHS_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"ed_DHS_htn_urban_prop.csv")


#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+ literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"ed_DHS_htn_literacy_rate_female.csv")


```

```{r lmer analysis obesity}



#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"ed_DHS_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+ PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19 ,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"ed_DHS_bmi_pci.csv")



##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"ed_DHS_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+ urban_prop_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19 ,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"ed_DHS_bmi_urban_prop.csv")



#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19 ,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"ed_DHS_bmi_literacy_rate_female.csv")



```
























```{r lmer analysis currently smoking }



#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"ed_DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"ed_DHS_csmoke_pci.csv")



##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"ed_DHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"ed_DHS_csmoke_urban_prop.csv")


#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"ed_DHS_csmoke_literacy_rate_female.csv")



```

######Multilevel Model: wealth interaction (quintiles computed nationally)


```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DHS,ex_d_name_ind,ex_diab_narrow_ind_dbl,ex_diab_broad_ind_dbl,ex_htn_narrow_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai,ex_htn_broad_ind)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}

#####################individual level vars
#age_group:

multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0))
     )



#national household wealth quintile
multilevel<-mutate(multilevel,
                   wq_1_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   wq_2_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==2,1,0)),
                   wq_3_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==3,1,0)),
                   wq_4_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==4,1,0)),
                   wq_5_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
                  
)





#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)



#education
multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)

#check
summary(as.factor(multilevel$ed_4_o))
class(multilevel$ed_4_o)
summary(multilevel$ed_5)



######################District level variables vars


#median wealth#continuous->scaled


multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))




##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))



#educational attainment new#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))


##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))


#literacy rate female#continuous->scaled

multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))




```

```{r level 1}




#diabetes
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:14,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DHS_nat_wq_nat_wq_diab_1.csv")


#hypertension
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:14,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"DHS_nat_wq_htn_1.csv")



#bmi

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:14,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"DHS_nat_wq_bmi_1.csv")


#csmoke
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:14,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"DHS_nat_wq_csmoke_1.csv")




```










```{r lmer analysis diabetes}


#median center

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DHS_nat_wq_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DHS_nat_wq_diab_pci.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DHS_nat_wq_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DHS_nat_wq_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DHS_nat_wq_diab_literacy_rate_female.csv")





```





```{r lmer analysis hypertension}


#median center

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_medi<-dplyr::mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"DHS_nat_wq_htn_medianai_center.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"DHS_nat_wq_htn_pci.csv")






##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"DHS_nat_wq_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"DHS_nat_wq_htn_urban_prop.csv")






#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_broad_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"DHS_nat_wq_htn_literacy_rate_female.csv")




```

```{r lmer analysis obesity}


#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"DHS_nat_wq_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"DHS_nat_wq_bmi_pci.csv")




##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"DHS_nat_wq_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"DHS_nat_wq_bmi_urban_prop.csv")








#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:19,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"DHS_nat_wq_bmi_literacy_rate_female.csv")




```



```{r lmer analysis currently smoking}


#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"DHS_nat_wq_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+ PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+ PCI_district_center:wq_5_n+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"DHS_nat_wq_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"DHS_nat_wq_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"DHS_nat_wq_csmoke_urban_prop.csv")




#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:19,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"DHS_nat_wq_csmoke_literacy_rate_female.csv")




```





#Multilevel Model: currently smoking stratisfied by gender

#####female
```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DHS,ex_d_name_ind,ex_diab_narrow_ind_dbl,ex_diab_broad_ind_dbl,ex_htn_narrow_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)

multilevel<-dplyr::filter(multilevel,sex==1)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)
```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}

#####################individual level vars
#age_group:

multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0))
    )





#household_wealth_quintile:

multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)





#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)



#education
multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)





######################District level variables vars


#median wealth#continuous->scaled


multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))




##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))



#educational attainment new#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))


##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))


#literacy rate female#continuous->scaled

multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))


```

```{r lmer analysis wealth (quintiles computed for each district)}




#csmoke level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:12,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"female_DHS_csmoke_1.csv")






#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"female_DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"female_DHS_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"female_DHS_csmoke_ed_att_new.csv")



##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"female_DHS_csmoke_urban_prop.csv")



#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+wq_2+wq_3+wq_4+wq_5+urban_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"female_DHS_csmoke_literacy_rate_female.csv")



```
```{r lmer analysis education}


#level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:12 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"female_ed_DHS_csmoke_1.csv")





#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"female_ed_DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"female_ed_DHS_csmoke_pci.csv")



##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"female_ed_DHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"female_ed_DHS_csmoke_urban_prop.csv")





#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

#convergence warning: use all fit to see if it's a false positive (accordung to help:comvergence {lme4})

csmoke_all_lmer.all<-allFit(csmoke_all_lmer)
ss<-summary(csmoke_all_lmer.all)
ss$ fixef               ## fixed effects
ss$ llik                ## log-likelihoods
ss$ sdcor               ## SDs and correlations
ss$ theta               ## Cholesky factors
ss$ which.OK            ## which fits worked

#"if all optimizers converge to values that are practically equivalent, then we would consider the convergence warnings to be false positives."->they do, so it's a false positve!

write.csv(coeffs_csmoke_literacy_rate_female,"female_ed_DHS_csmoke_literacy_rate_female.csv")




```
#####male
```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DHS,ex_d_name_ind,ex_diab_narrow_ind_dbl,ex_diab_broad_ind_dbl,ex_htn_narrow_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)


multilevel<-dplyr::filter(multilevel,sex==0)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)
```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}

#####################individual level vars
#age_group:

multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0))
             
)





#household_wealth_quintile:

multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)





#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)



#education
multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)





######################District level variables vars


#median wealth#continuous->scaled


multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))




##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))



#educational attainment new#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))


##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))


#literacy rate female#continuous->scaled

multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))


```

```{r lmer analysis wealth (quintiles computed for each district)}



#csmoke level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:13,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"male_DHS_csmoke_1.csv")





#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"male_DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"male_DHS_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"male_DHS_csmoke_ed_att_new.csv")



##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"male_DHS_csmoke_urban_prop.csv")



#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"male_DHS_csmoke_literacy_rate_female.csv")



```



```{r lmer analysis education}

#level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:13 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"male_ed_DHS_csmoke_1.csv")





#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"male_ed_DHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"male_ed_DHS_csmoke_pci.csv")








##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"male_ed_DHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"male_ed_DHS_csmoke_urban_prop.csv")





#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:18 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"male_ed_DHS_csmoke_literacy_rate_female.csv")




```










#Multilevel Model: Analyses using narrow definition of Diabetes and Hypertension

```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DHS,ex_d_name_ind,ex_diab_narrow_ind_dbl,ex_diab_broad_ind_dbl,ex_htn_narrow_ind_dbl,ex_htn_broad_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai,hh_wealth_quintile_groups_district)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)
```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}

#####################individual level vars
#age_group:

multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0))
     )





#household_wealth_quintile:

multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)

#check
summary(as.factor(multilevel$wq_1))
class(multilevel$wq_1)
summary(multilevel$hh_wealth_quintile_district)







#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)



#education
multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)

#check
summary(as.factor(multilevel$ed_4_o))
class(multilevel$ed_4_o)
summary(multilevel$ed_5)



######################District level variables vars


#median wealth#continuous->scaled


multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))




##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))



#educational attainment new#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))


##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))


#literacy rate female#continuous->scaled

multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))




```

```{r  lmer analysis high blood glucose}



####wealth (quintiles computed for each district)####

#level 1
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:14,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DHS_diab_1_narrow.csv")


#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DHS_diab_medianai_center_narrow.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DHS_diab_pci_narrow.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DHS_diab_ed_att_new_narrow.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DHS_diab_urban_prop_narrow.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DHS_diab_literacy_rate_female_narrow.csv")





####education####


#level 1
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:14 ,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"ed_DHS_diab_1_narrow.csv")


#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"ed_DHS_diab_medianai_center_narrow.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"ed_DHS_diab_pci_narrow.csv")




##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"ed_DHS_diab_ed_att_new_narrow.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"ed_DHS_diab_urban_prop_narrow.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:19 ,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"ed_DHS_diab_literacy_rate_female_narrow.csv")




```





```{r  lmer analysis high blood pressure }



####wealth (quintiles computed for each district)####


#level 1
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:14,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"DHS_htn_1_narrow.csv")



#median center

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"DHS_htn_medianai_center_narrow.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"DHS_htn_pci_narrow.csv")






##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"DHS_htn_ed_att_new_narrow.csv")


##urban prop

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"DHS_htn_urban_prop_narrow.csv")



#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"DHS_htn_literacy_rate_female_narrow.csv")




####education####

#level 1
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:14 ,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"ed_DHS_htn_1_narrow.csv")



#median center

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"ed_DHS_htn_medianai_center_narrow.csv")



#PCI

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+ PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"ed_DHS_htn_pci_narrow.csv")



##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"ed_DHS_htn_ed_att_new_narrow.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"ed_DHS_htn_urban_prop_narrow.csv")


#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+ literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:19 ,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"ed_DHS_htn_literacy_rate_female_narrow.csv")


```


